Abstract
Background We have run the simplified Naomi model using a range of inference methods.
Task In this report, we compare the accuracy of the posterior distributions obtained from these inference methods.
We compare the inference results from TMB, aghq and tmbstan.
Import these inference results as follows:
tmb <- readRDS("depends/tmb.rds")
aghq <- readRDS("depends/aghq.rds")
adam <- readRDS("depends/adam.rds")
tmbstan <- readRDS("depends/tmbstan.rds")
For more information about the conditions under which these results were generated, see:
depends <- yaml::read_yaml("orderly.yml")$depends
for(i in seq_along(depends)) {
report_name <- names(depends[[i]])
print(paste0("Inference results obtained from ", report_name, " with the query ", depends[[i]][[report_name]]$id))
report_id <- orderly::orderly_search(query = depends[[i]][[report_name]]$id, report_name)
print(paste0("Obtained report had ID ", report_id, " and was run with the following parameters:"))
print(orderly::orderly_info(report_id, report_name)$parameters)
}
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:tmb == TRUE)"
## [1] "Obtained report had ID 20230119-142551-23704e7b and was run with the following parameters:"
## $tmb
## [1] TRUE
##
## $aghq
## [1] FALSE
##
## $k
## [1] 2
##
## $ndConstruction
## [1] "product"
##
## $tmbstan
## [1] FALSE
##
## $niter
## [1] 1000
##
## $nthin
## [1] 1
##
## $nsample
## [1] 1000
##
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:aghq == TRUE && parameter:k == 1)"
## [1] "Obtained report had ID 20230119-170459-9b07786e and was run with the following parameters:"
## $aghq
## [1] TRUE
##
## $k
## [1] 1
##
## $ndConstruction
## [1] "product"
##
## $tmb
## [1] FALSE
##
## $tmbstan
## [1] FALSE
##
## $niter
## [1] 1000
##
## $nthin
## [1] 1
##
## $nsample
## [1] 1000
##
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:adam == TRUE)"
## [1] "Obtained report had ID 20230204-171524-7233e968 and was run with the following parameters:"
## $adam
## [1] TRUE
##
## $tmb
## [1] FALSE
##
## $aghq
## [1] FALSE
##
## $k
## [1] 2
##
## $ndConstruction
## [1] "product"
##
## $tmbstan
## [1] FALSE
##
## $niter
## [1] 1000
##
## $nthin
## [1] 1
##
## $nsample
## [1] 1000
##
## $area_level
## [1] 4
##
## [1] "Inference results obtained from naomi-simple_fit with the query latest(parameter:tmbstan == TRUE)"
## [1] "Obtained report had ID 20230114-142916-efabd65d and was run with the following parameters:"
## $tmbstan
## [1] TRUE
##
## $niter
## [1] 20000
##
## $nthin
## [1] 20
##
## $tmb
## [1] FALSE
##
## $aghq
## [1] FALSE
##
## $k
## [1] 2
##
## $nsample
## [1] 1000
All of the possible parameter names are as follows:
pars <- unique(names(tmb$fit$obj$env$par))
data.frame(
"TMB" = tmb$time,
"aghq" = aghq$time,
"adam" = adam$time,
"tmbstan" = tmbstan$time
)
adam <- adam$adam
beta_rho <- histogram_and_ecdf("beta_rho", i = 1, return_df = TRUE)
saveRDS(beta_rho$df, "beta_rho.rds")
betalapply(pars[stringr::str_starts(pars, "beta")], histogram_and_ecdf)
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
logitlapply(pars[stringr::str_starts(pars, "logit")], histogram_and_ecdf)
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
log_sigmalapply(pars[stringr::str_starts(pars, "log_sigma")], histogram_and_ecdf)
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
##
## [[10]]
##
## [[11]]
##
## [[12]]
##
## [[13]]
##
## [[14]]
ks_helper <- function(par, ...) {
to_ks_df(par) %>% ks_plot(par, ...)
}
betaks_helper("beta")
ks_helper("beta", method1 = "TMB", method2 = "adam")
logitks_helper("logit")
ks_helper("logit", method1 = "TMB", method2 = "adam")
log_sigmaks_helper("log_sigma")
ks_helper("log_sigma", method1 = "TMB", method2 = "adam")
u_rho_xks_helper("u_rho_x")
ks_helper("u_rho_x", method1 = "TMB", method2 = "adam")
u_rho_xsks_helper("u_rho_xs")
ks_helper("u_rho_xs", method1 = "TMB", method2 = "adam")
us_rho_xks_helper("us_rho_x")
ks_helper("us_rho_x", method1 = "TMB", method2 = "adam")
us_rho_xsks_helper("us_rho_xs")
ks_helper("us_rho_xs", method1 = "TMB", method2 = "adam")
u_rho_aks_helper("u_rho_a")
ks_helper("u_rho_a", method1 = "TMB", method2 = "adam")
u_rho_asks_helper("u_rho_as")
ks_helper("u_rho_as", method1 = "TMB", method2 = "adam")
u_alpha_xks_helper("u_alpha_x")
ks_helper("u_alpha_x", method1 = "TMB", method2 = "adam")
u_alpha_xsks_helper("u_alpha_xs")
ks_helper("u_alpha_xs", method1 = "TMB", method2 = "adam")
us_alpha_xks_helper("us_alpha_x")
ks_helper("us_alpha_x", method1 = "TMB", method2 = "adam")
us_alpha_xsks_helper("us_alpha_xs")
ks_helper("us_alpha_xs", method1 = "TMB", method2 = "adam")
u_alpha_aks_helper("u_alpha_a")
ks_helper("u_alpha_a", method1 = "TMB", method2 = "adam")
u_alpha_asks_helper("u_alpha_as")
ks_helper("u_alpha_as", method1 = "TMB", method2 = "adam")
u_alpha_xaks_helper("u_alpha_xa")
ks_helper("u_alpha_xa", method1 = "TMB", method2 = "adam")
ui_anc_rho_xks_helper("ui_anc_rho_x")
ks_helper("ui_anc_rho_x", method1 = "TMB", method2 = "adam")
ui_anc_alpha_xks_helper("ui_anc_alpha_x")
ks_helper("ui_anc_alpha_x", method1 = "TMB", method2 = "adam")
log_or_gammaks_helper("log_or_gamma")
ks_helper("log_or_gamma", method1 = "TMB", method2 = "adam")
ks_summary <- lapply(unique(names(tmb$fit$obj$env$par)), function(x) {
to_ks_df(x) %>%
group_by(method) %>%
summarise(ks = mean(ks), par = x)
}) %>%
bind_rows() %>%
pivot_wider(names_from = "method", values_from = "ks") %>%
rename(
"Parameter" = "par",
"KS(adam, tmbstan)" = "adam",
"KS(aghq, tmbstan)" = "aghq",
"KS(TMB, tmbstan)" = "TMB",
)
ks_summary %>%
gt::gt() %>%
gt::fmt_number(
columns = starts_with("KS"),
decimals = 3
)
| Parameter | KS(adam, tmbstan) | KS(aghq, tmbstan) | KS(TMB, tmbstan) |
|---|---|---|---|
| beta_rho | 0.094 | 0.090 | 0.085 |
| beta_alpha | 0.115 | 0.101 | 0.103 |
| beta_lambda | 0.042 | 0.061 | 0.074 |
| beta_anc_rho | 0.043 | 0.105 | 0.090 |
| beta_anc_alpha | 0.088 | 0.044 | 0.025 |
| logit_phi_rho_x | 0.536 | 0.536 | 0.118 |
| log_sigma_rho_x | 0.646 | 0.646 | 0.195 |
| logit_phi_rho_xs | 0.510 | 0.510 | 0.111 |
| log_sigma_rho_xs | 0.613 | 0.613 | 0.203 |
| logit_phi_rho_a | 0.552 | 0.552 | 0.080 |
| log_sigma_rho_a | 0.585 | 0.585 | 0.113 |
| logit_phi_rho_as | 0.569 | 0.569 | 0.104 |
| log_sigma_rho_as | 0.589 | 0.589 | 0.134 |
| log_sigma_rho_xa | 0.669 | 0.669 | 0.224 |
| u_rho_x | 0.091 | 0.092 | 0.095 |
| us_rho_x | 0.064 | 0.062 | 0.062 |
| u_rho_xs | 0.123 | 0.117 | 0.121 |
| us_rho_xs | 0.045 | 0.037 | 0.044 |
| u_rho_a | 0.091 | 0.087 | 0.083 |
| u_rho_as | 0.094 | 0.084 | 0.076 |
| logit_phi_alpha_x | 0.531 | 0.531 | 0.107 |
| log_sigma_alpha_x | 0.557 | 0.557 | 0.143 |
| logit_phi_alpha_xs | 0.551 | 0.551 | 0.143 |
| log_sigma_alpha_xs | 0.540 | 0.540 | 0.159 |
| logit_phi_alpha_a | 0.525 | 0.525 | 0.083 |
| log_sigma_alpha_a | 0.541 | 0.541 | 0.111 |
| logit_phi_alpha_as | 0.516 | 0.516 | 0.081 |
| log_sigma_alpha_as | 0.514 | 0.514 | 0.098 |
| log_sigma_alpha_xa | 0.627 | 0.627 | 0.146 |
| u_alpha_x | 0.096 | 0.096 | 0.089 |
| us_alpha_x | 0.087 | 0.084 | 0.083 |
| u_alpha_xs | 0.109 | 0.101 | 0.096 |
| us_alpha_xs | 0.106 | 0.097 | 0.095 |
| u_alpha_a | 0.076 | 0.085 | 0.084 |
| u_alpha_as | 0.100 | 0.096 | 0.102 |
| u_alpha_xa | 0.064 | 0.069 | 0.059 |
| OmegaT_raw | 0.518 | 0.518 | 0.022 |
| log_betaT | 0.689 | 0.689 | 0.244 |
| log_sigma_lambda_x | 0.736 | 0.736 | 0.269 |
| ui_lambda_x | 0.159 | 0.158 | 0.160 |
| log_sigma_ancrho_x | 0.504 | 0.504 | 0.037 |
| log_sigma_ancalpha_x | 0.519 | 0.519 | 0.083 |
| ui_anc_rho_x | 0.048 | 0.055 | 0.057 |
| ui_anc_alpha_x | 0.066 | 0.063 | 0.065 |
| log_sigma_or_gamma | 0.524 | 0.524 | 0.036 |
| log_or_gamma | 0.045 | 0.059 | 0.060 |
ggplot(ks_summary, aes(x = `KS(TMB, tmbstan)`, y = `KS(aghq, tmbstan)`)) +
geom_point(alpha = 0.5) +
xlim(0, 1) +
ylim(0, 1) +
geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
labs(x = "KS(TMB, tmbstan)", y = "KS(aghq, tmbstan)") +
theme_minimal()
ks_summary %>%
mutate(`KS difference` = `KS(TMB, tmbstan)` - `KS(aghq, tmbstan)`) %>%
ggplot(aes(x = `KS difference`)) +
geom_boxplot(width = 0.5) +
coord_flip() +
labs(x = "KS(TMB, tmbstan) - KS(aghq, tmbstan)") +
theme_minimal()
The following parameters have KS values greater than 0.5 in both dimensions.
(big_ks <- ks_summary %>%
filter(`KS(aghq, tmbstan)` + `KS(TMB, tmbstan)` > 0.5) %>%
pull(Parameter))
## [1] "logit_phi_rho_x" "log_sigma_rho_x" "logit_phi_rho_xs" "log_sigma_rho_xs" "logit_phi_rho_a" "log_sigma_rho_a" "logit_phi_rho_as" "log_sigma_rho_as"
## [9] "log_sigma_rho_xa" "logit_phi_alpha_x" "log_sigma_alpha_x" "logit_phi_alpha_xs" "log_sigma_alpha_xs" "logit_phi_alpha_a" "log_sigma_alpha_a" "logit_phi_alpha_as"
## [17] "log_sigma_alpha_as" "log_sigma_alpha_xa" "OmegaT_raw" "log_betaT" "log_sigma_lambda_x" "log_sigma_ancrho_x" "log_sigma_ancalpha_x" "log_sigma_or_gamma"
#' To write!
Suppose we have two sets of samples from the posterior.
For each sample we are going to want to evaluate the log-likelihood, so that we can calculate the log-likelihood ratios.
We can extract the TMB objective function for the log-likelihood as follows:
tmb$fit$obj$fn()
## [1] NaN